import os
import time
import mindspore as ms
from mindformers.pipeline import pipeline

os.environ["RUN_MODE"] = "predict"
os.environ["MS_INTERNAL_DISABLE_CUSTOM_KERNEL_LIST"] = "PagedAttention"
ms.set_context(device_id=0, mode=0, device_target='Ascend', jit_config={'jit_level': 'O0', 'infer_boost': 'on'})

pipeline_task = pipeline(task="text_generation",
                         model="CMB_AI_Lab/YiZhao-12B-Chat",  # model_path
                         framework='ms',
                         model_kwargs={"use_past": True},
                         trust_remote_code=True
                         )
while True:
    input_ = input("模型输入: ")
    template = f"[gMASK]<sop><|user|>\n{input_}<|assistant|>"

    pipeline_result = pipeline_task(template, do_sample=False, seed=int(time.time()))
    print(pipeline_result[0]["text_generation_text"][0].replace(f"\n{input_}", "", 1))